Project information¶

Team: 4¶

Project: 1¶

Dataset: Mimic-iii¶

Team member¶

  • Julio Real Rojas
  • Aqib Nisar
  • Shwetha Vedavinayagam
  • Pham Thien Phuc Nguyen

Import Libraries¶

In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency ,ttest_ind, f_oneway, shapiro, pointbiserialr, zscore, kendalltau, spearmanr, mannwhitneyu, kruskal,wilcoxon
from statsmodels.graphics.gofplots import qqplot
import itertools
import math

#to display all the rows/columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

Import dataset Full data set¶

In [ ]:
file_path = '/content/full_dataset_with_prediction_a.csv'
df_full = pd.read_csv(file_path)
df_full.head()
Out[ ]:
Age Gender Uncomplicated Hypertension Complicated Hypertension Uncomplicated Diabetes Complicated Diabetes Malignancy Hematologic Disease Metastasis Peripheral Vascular Disease Hypothyroidism Chronic Heart Failure Stroke Liver Disease SAPS II SOFA OASIS Sepsis Any Organ Failure Severe Respiratory Failure Severe Coagulation Failure Severe Liver Failure Severe Cardiovascular Failure Severe Central Nervous System Failure Severe Renal Failure Respiratory Dysfunction Cardiovascular Dysfunction Renal Dysfunction Hematologic Dysfunction Metabolic Dysfunction Neurologic Dysfunction Max Heart Rate Min Heart Rate Mean Heart Rate Max MAP Min MAP Mean MAP Max Systolic Pressure Min Systolic Pressure Mean Systolic Pressure Max Diastolic Pressure Min Diastolic Pressure Mean Diastolic Pressure Max Temperature Min Temperature Mean Temperature Max pH Min pH Mean pH Max Glucose Min Glucose Mean Glucose Max WBC Min WBC Mean WBC Max BUN Min BUN Mean BUN Max Creatinine Min Creatinine Mean Creatinine Max Hemoglobin Min Hemoglobin Mean Hemoglobin Hospital Mortality Predicted
0 65 1 1 0 1 0 0 0 0 0 0 0 0 0 67 12 41 0 0 0 0 0 0 1 0 0 0 0 0 0 0 87.0 60.0 71.466667 89.0 57.000000 70.200000 147.0 83.0 108.548387 65.0 43.0 54.548387 38.200001 36.099998 37.310714 7.42 7.29 7.35 175.0 76.0 124.94 21.7 14.6 18.05 23.0 18.0 20.67 1.5 1.1 1.33 11.2 9.3 10.08 0 0
1 24 0 0 0 0 0 0 1 0 0 0 0 0 0 19 1 35 0 1 0 0 0 0 0 0 1 0 0 0 0 0 129.0 69.0 85.459459 143.0 69.000000 89.085714 195.0 120.0 141.500000 127.0 55.0 72.705882 36.222221 35.166668 35.870370 7.57 7.26 7.41 204.0 134.0 169.00 6.3 6.3 6.30 13.0 13.0 13.00 0.7 0.6 0.65 12.6 12.6 12.60 0 1
2 76 1 1 0 1 0 0 0 0 0 0 0 0 0 42 4 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 80.0 53.0 73.258065 80.0 53.000000 66.781250 134.0 89.0 111.593750 66.0 38.0 48.625000 36.500000 35.444444 35.920635 7.46 7.29 7.38 173.0 107.0 149.33 12.9 9.3 10.87 28.0 27.0 27.50 1.0 1.0 1.00 12.0 10.1 10.60 0 0
3 53 1 0 0 0 0 1 0 0 0 0 0 0 1 29 9 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 99.0 53.0 74.454545 107.0 59.000000 73.607841 188.0 90.0 115.480769 104.0 42.0 54.153846 37.099998 36.700001 36.990322 7.49 7.27 7.39 240.0 72.0 157.46 16.6 9.9 14.43 36.0 23.0 29.60 1.3 1.1 1.22 11.1 7.1 10.01 0 0
4 36 1 1 0 1 0 0 0 0 0 0 0 0 0 28 4 42 1 1 0 0 0 0 0 0 0 0 1 0 0 0 156.0 107.0 117.964286 201.0 72.333298 104.595225 162.0 91.0 135.655172 113.0 53.0 78.344828 38.000001 36.666667 37.333334 7.38 7.19 7.28 364.0 141.0 223.00 15.2 8.7 12.13 27.0 21.0 25.00 1.9 1.5 1.68 15.4 13.4 14.33 0 1

Change the Value of 'Gender' Variable¶

In [ ]:
value_map = {'M': 1, 'F': 0} # '0' for female and '1' for Male
df['Gender'] = df['Gender'].replace(value_map)
In [ ]:
df_t.head()
Out[ ]:
Age Gender Uncomplicated Hypertension Complicated Hypertension Uncomplicated Diabetes Complicated Diabetes Malignancy Hematologic Disease Metastasis Peripheral Vascular Disease Hypothyroidism Chronic Heart Failure Stroke Liver Disease SAPS II SOFA OASIS Sepsis Any Organ Failure Severe Respiratory Failure Severe Coagulation Failure Severe Liver Failure Severe Cardiovascular Failure Severe Central Nervous System Failure Severe Renal Failure Respiratory Dysfunction Cardiovascular Dysfunction Renal Dysfunction Hematologic Dysfunction Metabolic Dysfunction Neurologic Dysfunction Max Heart Rate Min Heart Rate Mean Heart Rate Max MAP Min MAP Mean MAP Max Systolic Pressure Min Systolic Pressure Mean Systolic Pressure Max Diastolic Pressure Min Diastolic Pressure Mean Diastolic Pressure Max Temperature Min Temperature Mean Temperature Max pH Min pH Mean pH Max Glucose Min Glucose Mean Glucose Max WBC Min WBC Mean WBC Max BUN Min BUN Mean BUN Max Creatinine Min Creatinine Mean Creatinine Max Hemoglobin Min Hemoglobin Mean Hemoglobin Hospital Mortality Predicted Actual
0 65 1 1 0 1 0 0 0 0 0 0 0 0 0 67 12 41 0 0 0 0 0 0 1 0 0 0 0 0 0 0 87.0 60.0 71.466667 89.0 57.0 70.200000 147.0 83.0 108.548387 65.0 43.0 54.548387 38.200001 36.099998 37.310714 7.42 7.29 7.35 175.0 76.0 124.94 21.7 14.6 18.05 23.0 18.0 20.67 1.5 1.1 1.33 11.2 9.3 10.08 0 0 0
1 76 1 1 0 1 0 0 0 0 0 0 0 0 0 42 4 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 80.0 53.0 73.258065 80.0 53.0 66.781250 134.0 89.0 111.593750 66.0 38.0 48.625000 36.500000 35.444444 35.920635 7.46 7.29 7.38 173.0 107.0 149.33 12.9 9.3 10.87 28.0 27.0 27.50 1.0 1.0 1.00 12.0 10.1 10.60 0 0 0
2 53 1 0 0 0 0 1 0 0 0 0 0 0 1 29 9 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 99.0 53.0 74.454545 107.0 59.0 73.607841 188.0 90.0 115.480769 104.0 42.0 54.153846 37.099998 36.700001 36.990322 7.49 7.27 7.39 240.0 72.0 157.46 16.6 9.9 14.43 36.0 23.0 29.60 1.3 1.1 1.22 11.1 7.1 10.01 0 0 0
3 42 1 1 0 0 0 0 0 0 1 0 0 0 0 24 4 23 0 0 0 0 0 0 0 0 0 0 0 0 0 0 102.0 72.0 90.185185 85.0 58.0 70.148148 128.0 82.0 106.222222 70.0 47.0 56.907407 38.099998 35.799999 37.257408 7.46 7.29 7.39 228.0 75.0 130.88 23.1 9.3 16.20 13.0 13.0 13.00 0.8 0.8 0.80 16.1 7.6 10.56 0 0 0
4 77 1 1 0 0 0 0 0 0 0 0 1 0 0 39 5 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 94.0 71.0 80.517241 85.0 56.0 71.068966 136.0 86.0 116.000000 65.0 41.0 52.724138 38.200000 36.400000 37.168966 7.44 7.29 7.36 189.0 92.0 132.36 14.6 9.8 11.90 20.0 18.0 19.00 1.0 1.0 1.00 14.1 10.4 11.93 0 0 0

Divide dataset into 6 different datasets of true and fasle values for comparison¶

In [ ]:
df_true_pos = df_full[(df_full['Predicted'] == 1) & (df_full['Hospital Mortality'] == 1)] # to extract true positive observations
df_false_pos = df_full[(df_full['Predicted'] == 1) & (df_full['Hospital Mortality'] == 0)] # to extract false positive observations
df_true_neg = df_full[(df_full['Predicted'] == 0) & (df_full['Hospital Mortality'] == 0)] # to extract true negative observations
df_false_neg = df_full[(df_full['Predicted'] == 0) & (df_full['Hospital Mortality'] == 1)] # to extract false negatibe observations
df_true_values = df_full[((df_full['Predicted'] == 1) & (df_full['Hospital Mortality'] == 1)) | ((df_full['Predicted'] == 0) & (df_full['Hospital Mortality'] == 0))] # to extract true positive and true negative observations
df_false_values = df_full[((df_full['Predicted'] == 1) & (df_full['Hospital Mortality'] == 0)) | ((df_full['Predicted'] == 0) & (df_full['Hospital Mortality'] == 1))] # to extract false positive and false negative observations

Dimentions¶

In [ ]:
df_true_pos.shape
Out[ ]:
(606, 66)
In [ ]:
df_false_pos.shape
Out[ ]:
(195, 66)
In [ ]:
df_true_neg.shape
Out[ ]:
(542, 66)
In [ ]:
df_false_neg.shape
Out[ ]:
(131, 66)
In [ ]:
df_true_values.shape
Out[ ]:
(1148, 66)
In [ ]:
df_false_values.shape
Out[ ]:
(326, 66)

Confusion Matrix¶

In [ ]:
from sklearn.metrics import confusion_matrix
file_path = 'fullpredicted.csv'
df_full = pd.read_csv(file_path)

cm = confusion_matrix(df_full['Actual'], df_full['Predicted'])


# Convert confusion matrix to DataFrame for better visualization
cm_df = pd.DataFrame(cm, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive'])
print("Confusion Matrix:")
print(cm_df)
sns.heatmap(cm_df, annot=True,cmap="OrRd",fmt=',d')
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-12-4eac00ff6edd> in <cell line: 3>()
      1 from sklearn.metrics import confusion_matrix
      2 file_path = 'fullpredicted.csv'
----> 3 df_full = pd.read_csv(file_path)
      4 
      5 cm = confusion_matrix(df_full['Actual'], df_full['Predicted'])

/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    209                 else:
    210                     kwargs[new_arg_name] = new_arg_value
--> 211             return func(*args, **kwargs)
    212 
    213         return cast(F, wrapper)

/usr/local/lib/python3.10/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    329                     stacklevel=find_stack_level(),
    330                 )
--> 331             return func(*args, **kwargs)
    332 
    333         # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    948     kwds.update(kwds_defaults)
    949 
--> 950     return _read(filepath_or_buffer, kwds)
    951 
    952 

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in _read(filepath_or_buffer, kwds)
    603 
    604     # Create the parser.
--> 605     parser = TextFileReader(filepath_or_buffer, **kwds)
    606 
    607     if chunksize or iterator:

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in __init__(self, f, engine, **kwds)
   1440 
   1441         self.handles: IOHandles | None = None
-> 1442         self._engine = self._make_engine(f, self.engine)
   1443 
   1444     def close(self) -> None:

/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py in _make_engine(self, f, engine)
   1733                 if "b" not in mode:
   1734                     mode += "b"
-> 1735             self.handles = get_handle(
   1736                 f,
   1737                 mode,

/usr/local/lib/python3.10/dist-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    854         if ioargs.encoding and "b" not in ioargs.mode:
    855             # Encoding
--> 856             handle = open(
    857                 handle,
    858                 ioargs.mode,

FileNotFoundError: [Errno 2] No such file or directory: 'fullpredicted.csv'

Box Plots True Positive vs False Positive¶

In [ ]:
df_concat = pd.concat([df_true_pos, df_false_pos], keys=['True Positive', 'False Positive'], names=['Group'])

# Define the box_plot function
def box_plot(var, ax):
    sns.boxplot(x='Predicted', y=var, hue='Group', data=df_concat.reset_index(), ax=ax)
    ax.set_title(f'{var}')
    ax.set_xlabel('')
    ax.set_ylabel(var)
    ax.legend().remove()

# List of continuous variables
continuous_var = ["SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure", "Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

# Calculate number of rows needed
num_rows = math.ceil(len(continuous_var) / 7)

# Create subplots
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows*3))
axes = axes.flatten()

# Iterate over continuous variables and plot in subplots
for i, var in enumerate(continuous_var):
    if i < len(axes):
        box_plot(var, axes[i])
    else:
        break

# Hide any remaining empty subplots
for ax in axes[len(continuous_var):]:
    ax.axis('off')

# Add a general legend
fig.legend(labels=['True Positive', 'False Positive'], loc='lower right', fontsize=16, title='Group')

# Add a general title for the entire scatter plot
plt.suptitle('Comparision between True Positive and False Positive', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
No description has been provided for this image

Shapiro Test for normality¶

In [ ]:
continous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure",
                 "Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

for col in continous_var:
  statistic, p_value = shapiro(df_full[col])
  print("Shapiro-Wilk Test:")
  print("Test Statistic:", statistic)
  print("P-value:","{:.5f}".format(p_value))
  print()
Shapiro-Wilk Test:
Test Statistic: 0.9437509775161743
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9738647937774658
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9337024688720703
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9931221008300781
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9711896777153015
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9920005202293396
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.986447811126709
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.7188106775283813
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9546191096305847
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.970227062702179
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9645466804504395
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9810664653778076
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9779439568519592
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.8706220984458923
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9882334470748901
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9862073659896851
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.972098171710968
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.901451051235199
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9573802351951599
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9606291651725769
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9509005546569824
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9432029724121094
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9099737405776978
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9211623072624207
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9190303087234497
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9268007874488831
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.912987232208252
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9262189865112305
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.8415319919586182
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.8238466382026672
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.8329563736915588
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.6338029503822327
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.6145962476730347
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.6222485303878784
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9929233193397522
P-value: 0.00000

Shapiro-Wilk Test:
Test Statistic: 0.9937649965286255
P-value: 0.00001

Shapiro-Wilk Test:
Test Statistic: 0.980171263217926
P-value: 0.00000

Density Plots of True Positive and False Positive¶

In [ ]:
def density_plot(x, df_true_pos, df_false_pos, ax):
    sns.kdeplot(df_true_pos[x], fill=True, ax=ax)
    sns.kdeplot(df_false_pos[x], fill=True, ax=ax)

    # Add labels and legend
    ax.set_title(f'{x}')
    ax.set_xlabel('Predicted Scores')
    ax.set_ylabel('Density')
    ax.legend()

# Calculate the number of rows needed based on the number of continuous variables
num_rows = (len(continous_var) + 7) // 7  # Ceiling division

# Create a figure and axis array with the specified number of rows and 6 columns
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows * 3))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Iterate over the continuous variables and corresponding axes
for x, ax in zip(continous_var, axes):
    # If there are no more variables to plot, hide the axis
    if x is None:
        ax.axis('off')
        continue

    density_plot(x, df_true_pos, df_false_pos, ax)

# Hide any remaining empty subplots
for ax in axes[len(continous_var):]:
    ax.axis('off')

# Add a general legend for the entire scatter plot
plt.figlegend(labels=['True Values', 'False Values'], loc='lower right', fontsize=16)

# Add a general title for the entire scatter plot
plt.suptitle('Density Plots for True Positive and False Positive values of Continuous Variables', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image

T-test for True Positive vs False Positive¶

In [ ]:
# Extracting only continuous variables
df_continuous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure","Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

variables = []
p_values = []

for column in df_continuous_var:
    true_pos = df_true_pos[column]
    false_pos = df_false_pos[column]
    t_statistic, p_value = ttest_ind(true_pos, false_pos)
    print(f"P-value of {column}: {p_value:.5f}")
    variables.append(column)
    p_values.append(p_value)

# Sort variables and p_values based on p_values
sorted_variables, sorted_p_values = zip(*sorted(zip(variables, p_values), key=lambda x: x[1], reverse=True))

# Plotting the p-values in a graph
plt.figure(figsize=(10, 6))
plt.barh(np.arange(len(sorted_variables)), sorted_p_values, color='skyblue')
plt.yticks(np.arange(len(sorted_variables)), sorted_variables)
plt.xlabel('P-value')
plt.title('P-values of T-tests for Continuous Variables (True Positives vs False Positives)')
plt.axvline(0.05, color='r', linestyle='--', label='Significance level (0.05)')
plt.legend()
plt.show()
P-value of Age: 0.05422
P-value of SAPS II: 0.00000
P-value of SOFA: 0.00000
P-value of OASIS: 0.00000
P-value of Max Heart Rate: 0.32121
P-value of Min Heart Rate: 0.08374
P-value of Mean Heart Rate: 0.37215
P-value of Max MAP: 0.08868
P-value of Min MAP: 0.00004
P-value of Mean MAP: 0.00055
P-value of Max Systolic Pressure: 0.06189
P-value of Min Systolic Pressure: 0.00000
P-value of Mean Systolic Pressure: 0.00007
P-value of Max Diastolic Pressure: 0.00336
P-value of Min Diastolic Pressure: 0.00442
P-value of Mean Diastolic Pressure: 0.00366
P-value of Max Temperature: 0.00900
P-value of Min Temperature: 0.00003
P-value of Mean Temperature: 0.00003
P-value of Max pH: 0.00043
P-value of Min pH: 0.00029
P-value of Mean pH: 0.00004
P-value of Max Glucose: 0.08275
P-value of Min Glucose: 0.27195
P-value of Mean Glucose: 0.51764
P-value of Max WBC: 0.01355
P-value of Min WBC: 0.02549
P-value of Mean WBC: 0.02050
P-value of Max BUN: 0.05387
P-value of Min BUN: 0.00996
P-value of Mean BUN: 0.02537
P-value of Max Creatinine: 0.10473
P-value of Min Creatinine: 0.03166
P-value of Mean Creatinine: 0.06553
P-value of Max Hemoglobin: 0.34912
P-value of Min Hemoglobin: 0.22546
P-value of Mean Hemoglobin: 0.35980
No description has been provided for this image

Mann Whitney U Test¶

In [ ]:
variables = []
p_values = []
df_continuous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure","Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

for col in df_continuous_var:
    true_pos = df_true_pos[col]
    false_pos = df_false_pos[col]
    stat, p_value = mannwhitneyu(true_pos, false_pos)
    print(f"P-value of {col}:", "{:.5f}".format(p_value))
    variables.append(col)
    p_values.append(p_value)

sorted_variables, sorted_p_values = zip(*sorted(zip(variables, p_values), key=lambda x: x[1], reverse=True))

# Plot the p-values
plt.figure(figsize=(10, 6))
plt.barh(np.arange(len(sorted_variables)), sorted_p_values, color='skyblue')
plt.yticks(np.arange(len(sorted_variables)), sorted_variables)
plt.xlabel('P-value')
plt.title('P-values of Mann Whitney for Continuous Variables (True Positives vs False Positives)')
plt.axvline(0.05, color='r', linestyle='--', label='Significance level (0.05)')
plt.legend()
plt.show()
P-value of Age: 0.07877
P-value of SAPS II: 0.00000
P-value of SOFA: 0.00000
P-value of OASIS: 0.00000
P-value of Max Heart Rate: 0.23041
P-value of Min Heart Rate: 0.10210
P-value of Mean Heart Rate: 0.37054
P-value of Max MAP: 0.02056
P-value of Min MAP: 0.00010
P-value of Mean MAP: 0.00029
P-value of Max Systolic Pressure: 0.01719
P-value of Min Systolic Pressure: 0.00000
P-value of Mean Systolic Pressure: 0.00001
P-value of Max Diastolic Pressure: 0.00107
P-value of Min Diastolic Pressure: 0.00196
P-value of Mean Diastolic Pressure: 0.00271
P-value of Max Temperature: 0.02914
P-value of Min Temperature: 0.00003
P-value of Mean Temperature: 0.00014
P-value of Max pH: 0.00409
P-value of Min pH: 0.00161
P-value of Mean pH: 0.00034
P-value of Max Glucose: 0.24350
P-value of Min Glucose: 0.27919
P-value of Mean Glucose: 0.60837
P-value of Max WBC: 0.01624
P-value of Min WBC: 0.06511
P-value of Mean WBC: 0.03238
P-value of Max BUN: 0.06239
P-value of Min BUN: 0.00699
P-value of Mean BUN: 0.02722
P-value of Max Creatinine: 0.01927
P-value of Min Creatinine: 0.00802
P-value of Mean Creatinine: 0.01348
P-value of Max Hemoglobin: 0.19733
P-value of Min Hemoglobin: 0.20942
P-value of Mean Hemoglobin: 0.23240
No description has been provided for this image

Box Plots True Negative vs False Negative¶

In [ ]:
df_concat = pd.concat([df_true_neg, df_false_neg], keys=['True Negative', 'False Negative'], names=['Group'])

# Define the box_plot function
def box_plot(var, ax):
    sns.boxplot(x='Predicted', y=var, hue='Group', data=df_concat.reset_index(), ax=ax)
    ax.set_title(f'{var}')
    ax.set_xlabel('')
    ax.set_ylabel(var)
    ax.legend().remove()

# List of continuous variables
continuous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure", "Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

# Calculate number of rows needed
num_rows = math.ceil(len(continuous_var) / 7)

# Create subplots
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows*3))
axes = axes.flatten()

# Iterate over continuous variables and plot in subplots
for i, var in enumerate(continuous_var):
    if i < len(axes):
        box_plot(var, axes[i])
    else:
        break

# Hide any remaining empty subplots
for ax in axes[len(continuous_var):]:
    ax.axis('off')

# Add a general legend
fig.legend(labels=['True Negative', 'False Negative'], loc='lower right', fontsize=16, title='Group')

# Add a general title for the entire scatter plot
plt.suptitle('Comparision between True Negative and False Negative', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
No description has been provided for this image

Density Plots of True Negative and False Negative¶

In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt

def density_plot(x, df_true_neg, df_false_neg, ax):
    sns.kdeplot(df_true_neg[x], fill=True, ax=ax)
    sns.kdeplot(df_false_neg[x], fill=True, ax=ax)

    # Add labels and legend
    ax.set_title(f'{x}')
    ax.set_xlabel('Predicted Scores')
    ax.set_ylabel('Density')
    ax.legend()

# Calculate the number of rows needed based on the number of continuous variables
num_rows = (len(continous_var) + 7) // 7  # Ceiling division

# Create a figure and axis array with the specified number of rows and 8 columns
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows * 3))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Iterate over the continuous variables and corresponding axes
for x, ax in zip(continous_var, axes):
    # If there are no more variables to plot, hide the axis
    if x is None:
        ax.axis('off')
        continue

    density_plot(x, df_true_neg, df_false_neg, ax)

# Hide any remaining empty subplots
for ax in axes[len(continous_var):]:
    ax.axis('off')

# Add a general legend for the entire scatter plot
plt.figlegend(labels=['True Values', 'False Values'], loc='lower right', fontsize=16)

# Add a general title for the entire scatter plot
plt.suptitle('Density Plots for True Negative and False Negative values of Continuous Variables', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image

T-test for True Negative vs False Negative¶

In [ ]:
# Extracting only continuous variables
df_continuous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure","Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

variables = []
p_values = []

for column in df_continuous_var:
    true_neg = df_true_neg[column]
    false_neg = df_false_neg[column]
    t_statistic, p_value = ttest_ind(true_neg, false_neg)
    print(f"P-value of {column}: {p_value:.5f}")
    variables.append(column)
    p_values.append(p_value)

# Sort variables and p_values based on p_values
sorted_variables, sorted_p_values = zip(*sorted(zip(variables, p_values), key=lambda x: x[1], reverse=True))

# Plotting the p-values in a graph
plt.figure(figsize=(10, 6))
plt.barh(np.arange(len(sorted_variables)), sorted_p_values, color='skyblue')
plt.yticks(np.arange(len(sorted_variables)), sorted_variables)
plt.xlabel('P-value')
plt.title('P-values of T-tests for Continuous Variables (True Negatives vs False Negatives)')
plt.axvline(0.05, color='r', linestyle='--', label='Significance level (0.05)')
plt.legend()
plt.show()
P-value of Age: 0.03328
P-value of SAPS II: 0.00019
P-value of SOFA: 0.32780
P-value of OASIS: 0.04823
P-value of Max Heart Rate: 0.56985
P-value of Min Heart Rate: 0.14249
P-value of Mean Heart Rate: 0.46821
P-value of Max MAP: 0.02506
P-value of Min MAP: 0.87321
P-value of Mean MAP: 0.00000
P-value of Max Systolic Pressure: 0.00000
P-value of Min Systolic Pressure: 0.06807
P-value of Mean Systolic Pressure: 0.00000
P-value of Max Diastolic Pressure: 0.04111
P-value of Min Diastolic Pressure: 0.65160
P-value of Mean Diastolic Pressure: 0.01598
P-value of Max Temperature: 0.13346
P-value of Min Temperature: 0.34189
P-value of Mean Temperature: 0.82163
P-value of Max pH: 0.49452
P-value of Min pH: 0.48933
P-value of Mean pH: 0.41530
P-value of Max Glucose: 0.90527
P-value of Min Glucose: 0.00262
P-value of Mean Glucose: 0.21587
P-value of Max WBC: 0.32148
P-value of Min WBC: 0.07676
P-value of Mean WBC: 0.15066
P-value of Max BUN: 0.06567
P-value of Min BUN: 0.08726
P-value of Mean BUN: 0.07487
P-value of Max Creatinine: 0.02917
P-value of Min Creatinine: 0.04930
P-value of Mean Creatinine: 0.03102
P-value of Max Hemoglobin: 0.00875
P-value of Min Hemoglobin: 0.00200
P-value of Mean Hemoglobin: 0.19756
No description has been provided for this image

Boxplots of True values with False values¶

In [ ]:
df_concat = pd.concat([df_true_values, df_false_values], keys=['True', 'False'], names=['Group'])

# Define the box_plot function
def box_plot(var, ax):
    sns.boxplot(x='Predicted', y=var, hue='Group', data=df_concat.reset_index(), ax=ax)
    ax.set_title(f'{var}')
    ax.set_xlabel('')
    ax.set_ylabel(var)
    ax.legend().remove()

# List of continuous variables
continuous_var = ["SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure", "Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

# Calculate number of rows needed
num_rows = math.ceil(len(continuous_var) / 7)

# Create subplots
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows*3))
axes = axes.flatten()

# Iterate over continuous variables and plot in subplots
for i, var in enumerate(continuous_var):
    if i < len(axes):
        box_plot(var, axes[i])
    else:
        break

# Hide any remaining empty subplots
for ax in axes[len(continuous_var):]:
    ax.axis('off')

# Add a general legend
fig.legend(labels=['True', 'False'], loc='lower right', fontsize=16, title='Group')

# Add a general title for the entire scatter plot
plt.suptitle('Comparision between True Values and False Values', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
No description has been provided for this image

Density Plots of True values and False Values¶

In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt

def density_plot(x, df_true_values, df_false_values, ax):
    sns.kdeplot(df_true_values[x], fill=True, ax=ax)
    sns.kdeplot(df_false_values[x], fill=True, ax=ax)

    # Add labels and legend
    ax.set_title(f'{x}')
    ax.set_xlabel('Predicted Scores')
    ax.set_ylabel('Density')
    ax.legend()

# Calculate the number of rows needed based on the number of continuous variables
num_rows = (len(continous_var) + 6) // 7  # Ceiling division

# Create a figure and axis array with the specified number of rows and 8 columns
fig, axes = plt.subplots(num_rows, 7, figsize=(20, num_rows * 3))

# Flatten the axes array for easy iteration
axes = axes.flatten()

# Iterate over the continuous variables and corresponding axes
for x, ax in zip(continous_var, axes):
    # If there are no more variables to plot, hide the axis
    if x is None:
        ax.axis('off')
        continue

    density_plot(x, df_true_values, df_false_values, ax)

# Hide any remaining empty subplots
for ax in axes[len(continous_var):]:
    ax.axis('off')

# Add a general legend for the entire scatter plot
plt.figlegend(labels=['True Values', 'False Values'], loc='lower right', fontsize=16)

# Add a general title for the entire scatter plot
plt.suptitle('Density Plots for True and False values of Continuous Variables', fontsize=30, fontweight='bold')

plt.tight_layout()
plt.show()
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image

T-test for True values vs False values¶

In [ ]:
# Extracting only continuous variables
df_continuous_var = ["Age","SAPS II", "SOFA", "OASIS","Max Heart Rate", "Min Heart Rate", "Mean Heart Rate", "Max MAP", "Min MAP", "Mean MAP", "Max Systolic Pressure", "Min Systolic Pressure", "Mean Systolic Pressure", "Max Diastolic Pressure", "Min Diastolic Pressure","Mean Diastolic Pressure", "Max Temperature", "Min Temperature", "Mean Temperature", "Max pH", "Min pH", "Mean pH", "Max Glucose", "Min Glucose", "Mean Glucose", "Max WBC", "Min WBC", "Mean WBC", "Max BUN", "Min BUN", "Mean BUN", "Max Creatinine", "Min Creatinine", "Mean Creatinine", "Max Hemoglobin", "Min Hemoglobin", "Mean Hemoglobin"]

variables = []
p_values = []

for column in df_continuous_var:
    true_pos = df_true_values[column]
    false_pos = df_false_values[column]
    t_statistic, p_value = ttest_ind(true_pos, false_pos)
    print(f"P-value of {column}: {p_value:.5f}")
    variables.append(column)
    p_values.append(p_value)

# Sort variables and p_values based on p_values
sorted_variables, sorted_p_values = zip(*sorted(zip(variables, p_values), key=lambda x: x[1], reverse=True))

# Plotting the p-values in a graph
plt.figure(figsize=(10, 6))
plt.barh(np.arange(len(sorted_variables)), sorted_p_values, color='skyblue')
plt.yticks(np.arange(len(sorted_variables)), sorted_variables)
plt.xlabel('P-value')
plt.title('P-values of T-tests for Continuous Variables (True Values vs False Values)')
plt.axvline(0.05, color='r', linestyle='--', label='Significance level (0.05)')
plt.legend()
plt.show()
P-value of Age: 0.61497
P-value of SAPS II: 0.14265
P-value of SOFA: 0.00003
P-value of OASIS: 0.07948
P-value of Max Heart Rate: 0.93380
P-value of Min Heart Rate: 0.34607
P-value of Mean Heart Rate: 0.52893
P-value of Max MAP: 0.00511
P-value of Min MAP: 0.00296
P-value of Mean MAP: 0.00000
P-value of Max Systolic Pressure: 0.00005
P-value of Min Systolic Pressure: 0.00001
P-value of Mean Systolic Pressure: 0.00000
P-value of Max Diastolic Pressure: 0.00016
P-value of Min Diastolic Pressure: 0.04571
P-value of Mean Diastolic Pressure: 0.00031
P-value of Max Temperature: 0.00379
P-value of Min Temperature: 0.00248
P-value of Mean Temperature: 0.00026
P-value of Max pH: 0.02291
P-value of Min pH: 0.01006
P-value of Mean pH: 0.00031
P-value of Max Glucose: 0.33188
P-value of Min Glucose: 0.00437
P-value of Mean Glucose: 0.55742
P-value of Max WBC: 0.14600
P-value of Min WBC: 0.34880
P-value of Mean WBC: 0.23963
P-value of Max BUN: 0.97997
P-value of Min BUN: 0.51364
P-value of Mean BUN: 0.77214
P-value of Max Creatinine: 0.83034
P-value of Min Creatinine: 0.74198
P-value of Mean Creatinine: 0.98873
P-value of Max Hemoglobin: 0.25010
P-value of Min Hemoglobin: 0.00259
P-value of Mean Hemoglobin: 0.13727
No description has been provided for this image

Categorical - Mode¶

In [ ]:
# Get the mode of the Gender column in the True Positive group
mode_gender = df_concat.loc['True Positive', 'Complicated Diabetes'].mode()[0]

# Print the mode
print(mode_gender)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3801             try:
-> 3802                 return self._engine.get_loc(casted_key)
   3803             except KeyError as err:

/usr/local/lib/python3.10/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

/usr/local/lib/python3.10/dist-packages/pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'True Positive'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
<ipython-input-119-058b76b7089a> in <cell line: 2>()
      1 # Get the mode of the Gender column in the True Positive group
----> 2 mode_gender = df_concat.loc['True Positive', 'Complicated Diabetes'].mode()[0]
      3 
      4 # Print the mode
      5 print(mode_gender)

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
   1065             if self._is_scalar_access(key):
   1066                 return self.obj._get_value(*key, takeable=self._takeable)
-> 1067             return self._getitem_tuple(key)
   1068         else:
   1069             # we by definition only have the 0th axis

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
   1245         with suppress(IndexingError):
   1246             tup = self._expand_ellipsis(tup)
-> 1247             return self._getitem_lowerdim(tup)
   1248 
   1249         # no multi-index, so validate all of the indexers

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    965                 # We don't need to check for tuples here because those are
    966                 #  caught by the _is_nested_tuple_indexer check above.
--> 967                 section = self._getitem_axis(key, axis=i)
    968 
    969                 # We should never have a scalar section here, because

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1310         # fall thru to straight lookup
   1311         self._validate_key(key, axis)
-> 1312         return self._get_label(key, axis=axis)
   1313 
   1314     def _get_slice_axis(self, slice_obj: slice, axis: int):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _get_label(self, label, axis)
   1258     def _get_label(self, label, axis: int):
   1259         # GH#5567 this will fail if the label is not present in the axis.
-> 1260         return self.obj.xs(label, axis=axis)
   1261 
   1262     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   4047 
   4048         if isinstance(index, MultiIndex):
-> 4049             loc, new_index = index._get_loc_level(key, level=0)
   4050             if not drop_level:
   4051                 if lib.is_integer(loc):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_loc_level(self, key, level)
   3158                 return indexer, maybe_mi_droplevels(indexer, ilevels)
   3159         else:
-> 3160             indexer = self._get_level_indexer(key, level=level)
   3161             if (
   3162                 isinstance(key, str)

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_level_indexer(self, key, level, indexer)
   3261         else:
   3262 
-> 3263             idx = self._get_loc_single_level_index(level_index, key)
   3264 
   3265             if level > 0 or self._lexsort_depth == 0:

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_loc_single_level_index(self, level_index, key)
   2847             return -1
   2848         else:
-> 2849             return level_index.get_loc(key)
   2850 
   2851     def get_loc(self, key, method=None):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3802                 return self._engine.get_loc(casted_key)
   3803             except KeyError as err:
-> 3804                 raise KeyError(key) from err
   3805             except TypeError:
   3806                 # If we have a listlike key, _check_indexing_error will raise

KeyError: 'True Positive'
In [ ]:
cat_variables = ['Gender','Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
                 'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
                 'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
                 'Chronic Heart Failure', 'Stroke', 'Liver Disease','Sepsis', 'Any Organ Failure',
                 'Severe Respiratory Failure',
                 'Severe Coagulation Failure', 'Severe Liver Failure',
                 'Severe Cardiovascular Failure','Severe Central Nervous System Failure', 'Severe Renal Failure',
                 'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
                 'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction']
df_concat = pd.concat([df_t[df_t['Predicted'] == 1], df_fp], keys=['True Positive', 'False Positive'], names=['Group'])

for category in cat_variables:
  if df_concat.loc['True Positive', category].mode()[0] != df_concat.loc['False Positive', category].mode()[0]:
    print(f'There is the different in mode of {category}')
  print(f"Mode of {category} of True Positive: {df_concat.loc['True Positive', category].mode()[0]} - False Positive: {df_concat.loc['False Positive', category].mode()[0]}")
Mode of Gender of True Positive: 1 - False Positive: 1
Mode of Uncomplicated Hypertension of True Positive: 0 - False Positive: 0
Mode of Complicated Hypertension of True Positive: 0 - False Positive: 0
Mode of Uncomplicated Diabetes of True Positive: 0 - False Positive: 0
Mode of Complicated Diabetes of True Positive: 0 - False Positive: 0
Mode of Malignancy of True Positive: 0 - False Positive: 0
Mode of Hematologic Disease of True Positive: 0 - False Positive: 0
Mode of Metastasis of True Positive: 0 - False Positive: 0
Mode of Peripheral Vascular Disease of True Positive: 0 - False Positive: 0
Mode of Hypothyroidism of True Positive: 0 - False Positive: 0
Mode of Chronic Heart Failure of True Positive: 0 - False Positive: 0
Mode of Stroke of True Positive: 0 - False Positive: 0
Mode of Liver Disease of True Positive: 0 - False Positive: 0
Mode of Sepsis of True Positive: 0 - False Positive: 0
Mode of Any Organ Failure of True Positive: 1 - False Positive: 1
Mode of Severe Respiratory Failure of True Positive: 0 - False Positive: 0
Mode of Severe Coagulation Failure of True Positive: 0 - False Positive: 0
Mode of Severe Liver Failure of True Positive: 0 - False Positive: 0
Mode of Severe Cardiovascular Failure of True Positive: 0 - False Positive: 0
Mode of Severe Central Nervous System Failure of True Positive: 0 - False Positive: 0
Mode of Severe Renal Failure of True Positive: 0 - False Positive: 0
Mode of Respiratory Dysfunction of True Positive: 0 - False Positive: 0
Mode of Cardiovascular Dysfunction of True Positive: 0 - False Positive: 0
Mode of Renal Dysfunction of True Positive: 0 - False Positive: 0
Mode of Hematologic Dysfunction of True Positive: 0 - False Positive: 0
Mode of Metabolic Dysfunction of True Positive: 0 - False Positive: 0
Mode of Neurologic Dysfunction of True Positive: 0 - False Positive: 0
In [ ]:
cat_variables = ['Gender','Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
                 'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
                 'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
                 'Chronic Heart Failure', 'Stroke', 'Liver Disease','Sepsis', 'Any Organ Failure',
                 'Severe Respiratory Failure',
                 'Severe Coagulation Failure', 'Severe Liver Failure',
                 'Severe Cardiovascular Failure','Severe Central Nervous System Failure', 'Severe Renal Failure',
                 'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
                 'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction']
df_concat = pd.concat([df_true_pos, df_false_pos], keys=['True Positive', 'False Positive'], names=['Group'])

for category in cat_variables:
    fig, ax = plt.subplots(1, 2, sharey=True)
    #set the figure size
    fig.set_figwidth(15)

    sns.countplot(data = df_concat.loc['True Positive'], x = category, hue = category, ax=ax[0])
    ax[0].set_xlabel('True Positive')
    sns.countplot(data = df_concat.loc['False Positive'], x = category, hue = category, ax=ax[1])
    ax[1].set_xlabel('False Positive')

    # Calculate percentages
    true_pos_count_0 = df_concat[df_concat[category] == 0].loc['True Positive'].shape[0]
    true_pos_count_1 = df_concat[df_concat[category] == 1].loc['True Positive'].shape[0]
    true_pos_total = df_concat.loc['True Positive'].shape[0]

    true_pos_percentage = (abs(true_pos_count_0 - true_pos_count_1) / true_pos_total) * 100

    flase_pos_count_0 = df_concat[df_concat[category] == 0].loc['False Positive'].shape[0]
    flase_pos_count_1 = df_concat[df_concat[category] == 1].loc['False Positive'].shape[0]
    flase_pos_total = df_concat.loc['False Positive'].shape[0]

    flase_pos_percentage = (abs(flase_pos_count_0 - flase_pos_count_1) / flase_pos_total) * 100

    # Annotate the plot with the percentage difference
    ax[0].text(0.5, 0.5, f'Difference: {true_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[0].transAxes, fontsize=12, color='red')
    ax[1].text(0.5, 0.5, f'Difference: {flase_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[1].transAxes, fontsize=12, color='red')

    plt.suptitle(category)

    plt.tight_layout()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-91-d31c3f7b8044> in <cell line: 12>()
     28 
     29     flase_pos_count_0 = df_concat[df_concat[category] == 0].loc['False Positive'].shape[0]
---> 30     flase_pos_count_1 = df_concat[df_concat[category] == 1].loc['False Positive'].shape[0]
     31     flase_pos_total = df_concat.loc['False Positive'].shape[0]
     32 

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
   1071 
   1072             maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073             return self._getitem_axis(maybe_callable, axis=axis)
   1074 
   1075     def _is_scalar_access(self, key: tuple):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1310         # fall thru to straight lookup
   1311         self._validate_key(key, axis)
-> 1312         return self._get_label(key, axis=axis)
   1313 
   1314     def _get_slice_axis(self, slice_obj: slice, axis: int):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _get_label(self, label, axis)
   1258     def _get_label(self, label, axis: int):
   1259         # GH#5567 this will fail if the label is not present in the axis.
-> 1260         return self.obj.xs(label, axis=axis)
   1261 
   1262     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   4047 
   4048         if isinstance(index, MultiIndex):
-> 4049             loc, new_index = index._get_loc_level(key, level=0)
   4050             if not drop_level:
   4051                 if lib.is_integer(loc):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_loc_level(self, key, level)
   3158                 return indexer, maybe_mi_droplevels(indexer, ilevels)
   3159         else:
-> 3160             indexer = self._get_level_indexer(key, level=level)
   3161             if (
   3162                 isinstance(key, str)

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_level_indexer(self, key, level, indexer)
   3288             if start == end:
   3289                 # The label is present in self.levels[level] but unused:
-> 3290                 raise KeyError(key)
   3291             return slice(start, end)
   3292 

KeyError: 'False Positive'
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
cat_variables = ['Gender','Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
                 'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
                 'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
                 'Chronic Heart Failure', 'Stroke', 'Liver Disease','Sepsis', 'Any Organ Failure',
                 'Severe Respiratory Failure',
                 'Severe Coagulation Failure', 'Severe Liver Failure',
                 'Severe Cardiovascular Failure','Severe Central Nervous System Failure', 'Severe Renal Failure',
                 'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
                 'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction']
df_concat = pd.concat([df_true_neg, df_false_neg], keys=['True Negative', 'False Negative'], names=['Group'])

for category in cat_variables:
    fig, ax = plt.subplots(1, 2, sharey=True)
    #set the figure size
    fig.set_figwidth(15)

    sns.countplot(data = df_concat.loc['True Negative'], x = category, hue = category, ax=ax[0])
    ax[0].set_xlabel('True Negative')
    sns.countplot(data = df_concat.loc['False Negative'], x = category, hue = category, ax=ax[1])
    ax[1].set_xlabel('False Negative')

    # Calculate percentages
    true_pos_count_0 = df_concat[df_concat[category] == 0].loc['True Negative'].shape[0]
    true_pos_count_1 = df_concat[df_concat[category] == 1].loc['True Negative'].shape[0]
    true_pos_total = df_concat.loc['True Negative'].shape[0]

    true_pos_percentage = (abs(true_pos_count_0 - true_pos_count_1) / true_pos_total) * 100

    flase_pos_count_0 = df_concat[df_concat[category] == 0].loc['False Negative'].shape[0]
    flase_pos_count_1 = df_concat[df_concat[category] == 1].loc['False Negative'].shape[0]
    flase_pos_total = df_concat.loc['False Negative'].shape[0]

    flase_pos_percentage = (abs(flase_pos_count_0 - flase_pos_count_1) / flase_pos_total) * 100

    # Annotate the plot with the percentage difference
    ax[0].text(0.5, 0.5, f'Difference: {true_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[0].transAxes, fontsize=12, color='red')
    ax[1].text(0.5, 0.5, f'Difference: {flase_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[1].transAxes, fontsize=12, color='red')

    plt.suptitle(category)

    plt.tight_layout()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-89-b44c55bc4b64> in <cell line: 12>()
     22     # Calculate percentages
     23     true_pos_count_0 = df_concat[df_concat[category] == 0].loc['True Negative'].shape[0]
---> 24     true_pos_count_1 = df_concat[df_concat[category] == 1].loc['True Negative'].shape[0]
     25     true_pos_total = df_concat.loc['True Negative'].shape[0]
     26 

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in __getitem__(self, key)
   1071 
   1072             maybe_callable = com.apply_if_callable(key, self.obj)
-> 1073             return self._getitem_axis(maybe_callable, axis=axis)
   1074 
   1075     def _is_scalar_access(self, key: tuple):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1310         # fall thru to straight lookup
   1311         self._validate_key(key, axis)
-> 1312         return self._get_label(key, axis=axis)
   1313 
   1314     def _get_slice_axis(self, slice_obj: slice, axis: int):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py in _get_label(self, label, axis)
   1258     def _get_label(self, label, axis: int):
   1259         # GH#5567 this will fail if the label is not present in the axis.
-> 1260         return self.obj.xs(label, axis=axis)
   1261 
   1262     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
   4047 
   4048         if isinstance(index, MultiIndex):
-> 4049             loc, new_index = index._get_loc_level(key, level=0)
   4050             if not drop_level:
   4051                 if lib.is_integer(loc):

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_loc_level(self, key, level)
   3158                 return indexer, maybe_mi_droplevels(indexer, ilevels)
   3159         else:
-> 3160             indexer = self._get_level_indexer(key, level=level)
   3161             if (
   3162                 isinstance(key, str)

/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/multi.py in _get_level_indexer(self, key, level, indexer)
   3288             if start == end:
   3289                 # The label is present in self.levels[level] but unused:
-> 3290                 raise KeyError(key)
   3291             return slice(start, end)
   3292 

KeyError: 'True Negative'
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
cat_variables = ['Gender','Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
                 'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
                 'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
                 'Chronic Heart Failure', 'Stroke', 'Liver Disease','Sepsis', 'Any Organ Failure',
                 'Severe Respiratory Failure',
                 'Severe Coagulation Failure', 'Severe Liver Failure',
                 'Severe Cardiovascular Failure','Severe Central Nervous System Failure', 'Severe Renal Failure',
                 'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
                 'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction']
df_concat = pd.concat([df_true_values, df_false_values], keys=['True Prediction', 'False Negative'], names=['Group'])

for category in cat_variables:
    fig, ax = plt.subplots(1, 2, sharey=True)
    #set the figure size
    fig.set_figwidth(15)

    sns.countplot(data = df_concat.loc['True Prediction'], x = category, hue = category, ax=ax[0])
    ax[0].set_xlabel('True Prediction')
    sns.countplot(data = df_concat.loc['False Negative'], x = category, hue = category, ax=ax[1])
    ax[1].set_xlabel('False Negative')

    # Calculate percentages
    true_pos_count_0 = df_concat[df_concat[category] == 0].loc['True Prediction'].shape[0]
    true_pos_count_1 = df_concat[df_concat[category] == 1].loc['True Prediction'].shape[0]
    true_pos_total = df_concat.loc['True Prediction'].shape[0]

    true_pos_percentage = (abs(true_pos_count_0 - true_pos_count_1) / true_pos_total) * 100

    flase_pos_count_0 = df_concat[df_concat[category] == 0].loc['False Negative'].shape[0]
    flase_pos_count_1 = df_concat[df_concat[category] == 1].loc['False Negative'].shape[0]
    flase_pos_total = df_concat.loc['False Negative'].shape[0]

    flase_pos_percentage = (abs(flase_pos_count_0 - flase_pos_count_1) / flase_pos_total) * 100

    # Annotate the plot with the percentage difference
    ax[0].text(0.5, 0.5, f'Difference: {true_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[0].transAxes, fontsize=12, color='red')
    ax[1].text(0.5, 0.5, f'Difference: {flase_pos_percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=ax[1].transAxes, fontsize=12, color='red')

    plt.suptitle(category)

    plt.tight_layout()
<ipython-input-88-345a9b7a41d2>:13: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`.
  fig, ax = plt.subplots(1, 2, sharey=True)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Bar Plot Categorical Variables¶

In [ ]:
cat_variables = ['Gender', 'Uncomplicated Hypertension', 'Complicated Hypertension', 'Uncomplicated Diabetes',
                 'Complicated Diabetes', 'Malignancy', 'Hematologic Disease',
                 'Metastasis', 'Peripheral Vascular Disease', 'Hypothyroidism',
                 'Chronic Heart Failure', 'Stroke', 'Liver Disease', 'Sepsis', 'Any Organ Failure',
                 'Severe Respiratory Failure',
                 'Severe Coagulation Failure', 'Severe Liver Failure',
                 'Severe Cardiovascular Failure', 'Severe Central Nervous System Failure', 'Severe Renal Failure',
                 'Respiratory Dysfunction', 'Cardiovascular Dysfunction',
                 'Renal Dysfunction', 'Hematologic Dysfunction', 'Metabolic Dysfunction', 'Neurologic Dysfunction']

# Concatenate all dataframes
df_concat = pd.concat([df_true_pos, df_false_pos, df_true_neg, df_false_neg, df_true_values, df_false_values],
                      keys=['True Positive', 'False Positive', 'True Negative', 'False Negative', 'True Values', 'False Values'],
                      names=['Group'])

fig, axes = plt.subplots(len(cat_variables), 6, figsize=(26, 6 * len(cat_variables)))
fig.suptitle('Comparison of Categorical Variables', fontsize=38, fontweight='bold')

for i, category in enumerate(cat_variables):
    axes[i, 2].text(0.5, 1.1, category, ha='center', va='bottom', transform=axes[i, 2].transAxes, fontsize=18, fontweight='bold')

    # True Positive
    sns.countplot(data=df_concat.loc['True Positive'], x=category, hue=category, ax=axes[i, 0])
    axes[i, 0].set_title('True Positive')
    axes[i, 0].set_xlabel('')
    axes[i, 0].set_ylabel('')
    axes[i, 0].legend().remove()

    # False Positive
    sns.countplot(data=df_concat.loc['False Positive'], x=category, hue=category, ax=axes[i, 1])
    axes[i, 1].set_title('False Positive')
    axes[i, 1].set_xlabel('')
    axes[i, 1].set_ylabel('')
    axes[i, 1].legend().remove()

    # True Negative
    sns.countplot(data=df_concat.loc['True Negative'], x=category, hue=category, ax=axes[i, 2])
    axes[i, 2].set_title('True Negative')
    axes[i, 2].set_xlabel('')
    axes[i, 2].set_ylabel('')
    axes[i, 2].legend().remove()

    # False Negative
    sns.countplot(data=df_concat.loc['False Negative'], x=category, hue=category, ax=axes[i, 3])
    axes[i, 3].set_title('False Negative')
    axes[i, 3].set_xlabel('')
    axes[i, 3].set_ylabel('')
    axes[i, 3].legend().remove()

    # True Values
    sns.countplot(data=df_concat.loc['True Values'], x=category, hue=category, ax=axes[i, 4])
    axes[i, 4].set_title('True Values')
    axes[i, 4].set_xlabel('')
    axes[i, 4].set_ylabel('')
    axes[i, 4].legend().remove()

    # False Values
    sns.countplot(data=df_concat.loc['False Values'], x=category, hue=category, ax=axes[i, 5])
    axes[i, 5].set_title('False Values')
    axes[i, 5].set_xlabel('')
    axes[i, 5].set_ylabel('')
    axes[i, 5].legend().remove()

    # Calculate percentage difference for each group
    for j, group in enumerate(['True Positive', 'False Positive', 'True Negative', 'False Negative', 'True Values', 'False Values']):
        try:
            count_0 = df_concat[df_concat[category] == 0].loc[group].shape[0]
            count_1 = df_concat[df_concat[category] == 1].loc[group].shape[0]
            total = df_concat.loc[group].shape[0]
            percentage = (abs(count_0 - count_1) / total) * 100
        except KeyError:
            percentage = 100  # Assign a default value if KeyError occurs

        axes[i, j].text(0.5, 0.5, f'Difference: {percentage:.2f}%', horizontalalignment='center', verticalalignment='center', transform=axes[i, j].transAxes, fontsize=12, color='black', fontweight='bold')

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()
No description has been provided for this image